
## V appendix2 - for ran B. - Summary Statistics BY predicted Mortality --------------------------------------- 

desc_stats_func_3 <- function(data) {
  t <- data[, .(
    obs_num      = scales::comma(length(unique(obs_uniq_ident))),
    sh_N         =   round(.N/nrow(data)*100,2),
    decedent_pct =  .N/nrow(data[DMG_died_within_365d == "1"]),
    # outcome
    died_pct_1y   = mean( DMG_died_within_365d == "1"),
    died_pct_6m   = mean( DMG_died_within_180d == "1"),
    died_pct_3m   = mean( DMG_died_within_90d == "1"),
    
    #Demog
    age_avg     = scales::comma(round(mean(DMG_age), 1)),
    female_pct  = mean(DMG_gender == "F"),
    arabs_pct   = mean(DMG_clinic_ethnicity == "arab"),
    sup_ins_pct = mean(DMG_supplementary_insurance == "1"),
    confined_pct= mean(COV_sw_confined == "1"),
    
    COV_sw_malignancy_active_5y_pct = mean(COV_sw_malignancy_active_5y=="1"),
    #Chronic 
    CHR_HYPERLIPIDEMIA_pct = mean (CHR_HYPERLIPIDEMIA  == "1"), 
    CHR_Hypertension_pct = mean (CHR_Hypertension  == "1"), 
    CHR_ARTHROPATHY_pct = mean (CHR_ARTHROPATHY  == "1"), 
    CHR_Diabetes_pct = mean (CHR_Diabetes  == "1"), 
    CHR_IHD_pct                        = mean (CHR_IHD   == "1"),
    CHR_Arrhythmia_pct = mean (CHR_Arrhythmia == "1"),
    CHR_Neurological_pct = mean (CHR_Neurological == "1"),
    CHR_Kidney_pct = mean (CHR_Kidney   == "1"),  
    CHR_Gastritis_pct = mean (CHR_Gastritis  == "1"), 
    CHR_CRF_pct                        = mean (CHR_CRF == "1"),
    CHR_OSTEOPOROSIS_pct = mean (CHR_OSTEOPOROSIS == "1"),
    CHR_CVA_pct = mean (CHR_CVA       == "1"),      
    CHR_DEPRESSION_pct = mean (CHR_DEPRESSION  == "1"), 
    CHR_ValvularCardiac_pct = mean (CHR_ValvularCardiac  == "1"), 
    CHR_CHF_pct = mean (CHR_CHF  == "1"), 
    CHR_COPD_pct = mean (CHR_COPD == "1"),
    # prior util  
    UTL_l365d_drugs_count = mean_with_share_zero(UTL_l365d_drugs_count),
    UTL_l365d_labs_count = mean_with_share_zero(UTL_l365d_labs_count),
    UTL_l365d_imaging_count = mean_with_share_zero(UTL_l365d_imaging_count),
    UTL_l365d_abm_count =   paste0(round(mean( (UTL_l365d_ambuDiag_proc_count + UTL_l365d_ambuTreat_proc_count + 
                                                  UTL_l365d_dayHospSurg_cost    + UTL_l365d_dayHospNonSurg_cost ),
                                               na.rm=T),1), 
                                   " (", 
                                   round(1 - sum((UTL_l365d_ambuDiag_proc_count + UTL_l365d_ambuTreat_proc_count + 
                                                    UTL_l365d_dayHospSurg_cost    + UTL_l365d_dayHospNonSurg_cost)==0)/.N,3)*100,
                                   ")"),
    UTL_l365d_ER_count =     mean_with_share_zero(UTL_l365d_ER_count),
    UTL_l365d_HOSP_count =    paste0(round(mean(  (UTL_f365d_diff_count + UTL_f365d_hospPlanned_count + UTL_f365d_hospUnplanned_count) ,
                                                  na.rm=T),0), 
                                     " (", 
                                     round(1 - sum((UTL_f365d_diff_count + UTL_f365d_hospPlanned_count + UTL_f365d_hospUnplanned_count)==0)/.N,3)*100,
                                     ")"),
    # util 
    cost_1yrBef =   mean_with_share_zero_comma(UTL_l365d_total_cost), 
    #  ACG score 
    ACG_RUB_low_pct      =  sum(  ACG_RUB %in% c("0", "1","2") )/sum( (ACG_RUB != "Missing") ),
    ACG_RUB_moderate_pct =  sum(  ACG_RUB %in% c("3") )/sum( (ACG_RUB != "Missing") ),
    ACG_RUB_High_pct     =  sum(  ACG_RUB %in% c("4", "5") )/sum( (ACG_RUB != "Missing") ),
    COV_bmi_value            = 	mean_with_share_miss( COV_bmi_value)	,
    COV_bp_dias              =  mean_with_share_miss( COV_bp_dias_last)	,
    COV_bp_sys               =  mean_with_share_miss( COV_bp_sys_last)	,
    #  Lab measurments  
    LAB_HB_last						   = 	mean_with_share_miss( BT_HB_last_val_num)	,
    LAB_HCT_last						= 	mean_with_share_miss(BT_HCT_HGB_last_val_num )	,	
    LAB_RBC_last						= 	mean_with_share_miss( BT_RBC_last_val_num)	,	
    LAB_PLT_last						= 	mean_with_share_miss( BT_PLT_last_val_num)	,	 
    LAB_NEUT_abs_EHR_last		= 	mean_with_share_miss(BT_NEUT_abs_last_val_num )	,	  
    LAB_LYMP_abs_EHR_last		= 	mean_with_share_miss( BT_LYMP_abs_last_val_num)		 
  ),  by= .(bins = plyr::round_any(prob_for_report, 0.1, floor))]
  num_to_pct <- grep("pct", names(t), value=T)
  t[, (num_to_pct) := lapply(.SD,  
                             function(x) {(round(x*100, 1))}),  .SDcols = num_to_pct]
}


unif_desc_stats_all_p <-   desc_stats_func_3(test_dt_withProb_all)

unif_desc_stats_all_p[,bins := paste0("[",bins,",",bins+0.1,")")]

unif_desc_stats_all_p_t <- 
  as.data.table(t(unif_desc_stats_all_p),keep.rownames = T)

names(unif_desc_stats_all_p_t)<- paste0(unif_desc_stats_all_p_t[1,])

# drop uninformative cells 
names_table_1detailed_bins <- 
  c("Number of beneficiaries","Share of population (\\%)","Share of decedent (\\%)",
    "1-year all-cause mortality (\\%)","6-months all-cause mortality (\\%)","3-months all-cause mortality (\\%)",
    "Age (mean) (minimum = 25) (y)", "Sex (\\% Female)", "Ethnicity (\\% Arabs)",
    "Supplementary Insurance (\\%)","Disability (\\%)",
    "Active malignancy (last 5 years - \\%)",
    "Hyperlipidemia", "Hypertension", "Arthropathy" ,"Diabetes",
    "IHD","Arrhythmia","Neurological","Kidney","Gastritis","CRF",
    "Osteoporosis","CVA","Depression","Valvular Cardiac","CHF","COPD",
    "Prescription Drugs", "Laboratory Tests", "Imaging Events",
    "Ambulatory encounters", "Emergency Room visits", "Hospital visits",
    "Total spending (NIS)",
    "Healthy or low","Moderate","High or very high",
    "BMI", "Diastolic blood pressure (mm Hg)", "Systolic blood pressure (mm Hg)",
    "Hemoglobin (g/dL)", "Hematocrit, (\\%)", "Red blood cells",
    "Platelets  (1000/uL)","Neutrophiles" , "Lymphocytes")

invisible(Hmisc::latex(
  unif_desc_stats_all_p_t[-c(1),-1 ] ,
  file = "desc_stats_phat_bins.tex",
  center = 'centering',
  n.rgroup = c(3, 3, 5,1, 16, 6,1,3, 9),
  rgroup = c("Sample Size", "Mortality rates", "Demographics","Cancer History",
             "Chronic Conditions, (\\%)",
             "Prior Utilization, mean 1yr count (\\% non zero)", 
             "Prior Utilization, mean 1yr cost (\\% non zero)", 
             "ACG Score,*", 
             "Last Clinical Measurements, mean (\\% non missing)"),
  rowname =  names_table_1detailed_bins,
  rowlabel = "Predicted Mortality Risk",  
  col.just = c("l",rep.int("r", 9)),
  extracolheads = c("(1)", "(2)", "(3)", "(4)","(5)", "(6)", "(7)", "(8)","(9)"),
  na.blank = TRUE, 
  extracolsize = "normalsize"
))



unif_desc_stats_cancer_p <-   desc_stats_func_3(test_dt_withProb_cancer)

setorder(unif_desc_stats_cancer_p,bins)

unif_desc_stats_cancer_p[,bins := paste0("[",bins,",",bins+0.1,")")]

unif_desc_stats_cancer_p_t <- 
  as.data.table(t(unif_desc_stats_cancer_p),keep.rownames = T)

names(unif_desc_stats_cancer_p_t)<- paste0(unif_desc_stats_cancer_p_t[1,])


invisible(Hmisc::latex(
  unif_desc_stats_cancer_p_t[-c(1),-1 ] ,
  file = "desc_stats_phat_bins_cancer.tex",
  center = 'centering',
  n.rgroup = c(3, 3, 5,1, 16, 6,1,3, 9),
  rgroup = c("Sample Size", "Mortality rates", "Demographics","Cancer History",
             "Chronic Conditions, (\\%)",
             "Prior Utilization, mean 1yr count (\\% non zero)", 
             "Prior Utilization, mean 1yr cost (\\% non zero)", 
             "ACG Score,*", 
             "Last Clinical Measurements, mean (\\% non missing)"),
  rowname =  names_table_1detailed_bins,
  rowlabel = "Predicted Mortality Risk",  
  col.just = c("l",rep.int("r", 9)),
  extracolheads = c("(1)", "(2)", "(3)", "(4)","(5)", "(6)", "(7)", "(8)","(9)","(10)"),
  na.blank = TRUE, 
  extracolsize = "normalsize"
))
